'''
  //div[@class="s_position_list"]
  //div[1]
  //div[last()]
  //div[position() > 3]
  //div[contains(@class, 'fl')]
'''

from lxml import etree

text = '''
  <form method="post" id="loginForm" class="login-form" action="http://www.renren.com/PLogin.do">
  <dl class="top clearfix">
  <dd style="border-color: rgb(173, 182, 201);">
  <input type="text" name="email" class="input-text" id="email" tabindex="1" value="" style="color: rgb(51, 51, 51);">
  </dd>
  </dl>
  <dl class="pwd clearfix">
  <dd style="border-color: rgb(142, 150, 161);">
  <input type="password" id="password" name="password" error="请输入密码" class="input-text" tabindex="2">
  <label class="pwdtip" id="pwdTip" for="password" style="visibility: hidden;">请输入密码</label>
  <a class="forgetPwd" id="forgetPwd" href="http://safe.renren.com/findPass.do" stats="home_findpassword">忘记密码？</a>
  </dd>
  </dl>
  <div class="caps-lock-tips" id="capsLockMessage" style="display:none"></div>
  <dl class="savepassword clearfix">
  <dt>
  <label title="为了确保您的信息安全，请不要在网吧或者公共机房勾选此项！" for="autoLogin" class="labelCheckbox">
  <input type="checkbox" name="autoLogin" id="autoLogin" value="true" tabindex="4">下次自动登录
  </label>
  </dt>
  <dd>
  <span class="getpassword" id="getpassword"><a href="http://safe.renren.com/findPass.do" stats="home_findpassword">忘记密码？</a></span>
  </dd>
  </dl>
  </form>
'''

def parse_text():
  htmlElement = etree.HTML(text)
  htmlstring = etree.tostring(htmlElement, encoding='utf-8').decode('utf-8')


# 从文件中读取html
def parse_file():
  parser = etree.HTMLParser(encoding='utf-8')
  htmlElement = etree.parse("E:\index.html", parser=parser)
  print(etree.tostring(htmlElement, encoding='utf-8').decode('utf-8'))

if __name__ == '__main__':
  parse_file()




